Observed data
Observed log-chlorophyll at representative station in SF Bay Delta region.
library(tidyverse)
library(lubridate)
library(mgcv)
library(plotly)
library(WRTDStidal)
library(gridExtra)
source('R/funcs.R')
# flow data, left moving window average of 30 days
data(sf_fldat)
fl_dat <- sf_fldat %>%
rename(date = Date) %>%
filter(station %in% 'sac') %>%
mutate(
qsm = stats::filter(q, rep(1, 30)/30, sides = 1, method = 'convolution')
)
# format the data to model
data(sf_dat)
sf_mod <- sf_dat %>%
filter(Site_Code %in% 'C3') %>%
rename(date = Date) %>%
mutate(
doy = yday(date),
dec_time = decimal_date(date),
yr = year(date),
mo = month(date, label = T)
) %>%
left_join(fl_dat, by = 'date') %>%
mutate( # all variables ln-transformed
flo = log(qsm),
chl = log(chl),
tss = log(tss),
nh = log(nh)
) %>%
select(-q, -qsm, -station, -Latitude, -Longitude, -Location)
# plot, all
p <- ggplot(sf_mod, aes(x = date, y = chl)) +
geom_line() +
theme_bw()
ggplotly(p)
# boxplot, by years
p <- ggplot(sf_mod, aes(x = yr, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
# boxplot, by month
p <- ggplot(sf_mod, aes(x = mo, y = chl)) +
geom_boxplot() +
theme_bw()
ggplotly(p)
GAMs with annual, seasonal trends
Some simple GAMs to explore annual, seasonal trends.
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"te(dec_time, doy, bs = c('tp', 'cc'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sf_mod,
na.action = na.exclude
)
})
names(mods) <- paste0('mod', seq_along(mods))
Summary stats of annual, seasonal models:
# smoother stats of GAMs
map(mods, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| mod1 |
s(dec_time) |
6.866894 |
7.962040 |
29.957511 |
0.0000000 |
| mod2 |
s(doy) |
3.191626 |
8.000000 |
12.600387 |
0.0000000 |
| mod3 |
te(dec_time,doy) |
13.362860 |
15.974582 |
27.262699 |
0.0000000 |
| mod4 |
s(dec_time) |
7.272455 |
8.281637 |
35.566876 |
0.0000000 |
| mod4 |
s(doy) |
3.591968 |
8.000000 |
18.279046 |
0.0000000 |
| mod5 |
s(dec_time) |
7.347362 |
8.329118 |
18.745688 |
0.0000000 |
| mod5 |
te(dec_time,doy) |
8.792719 |
15.000000 |
11.089349 |
0.0000000 |
| mod6 |
s(doy) |
2.933188 |
8.000000 |
2.355489 |
0.0000000 |
| mod6 |
te(dec_time,doy) |
10.642911 |
12.982207 |
22.279284 |
0.0000000 |
| mod7 |
s(dec_time) |
7.332864 |
8.308760 |
9.278011 |
0.0000000 |
| mod7 |
s(doy) |
3.385611 |
8.000000 |
5.284500 |
0.0000000 |
| mod7 |
te(dec_time,doy) |
6.637691 |
15.000000 |
1.417708 |
0.0003557 |
# summary stats of GAMs
map(mods, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| mod1 |
1117.9219 |
0.3025777 |
| mod2 |
1215.8161 |
0.1606211 |
| mod3 |
995.0956 |
0.4490483 |
| mod4 |
986.8771 |
0.4548302 |
| mod5 |
972.2099 |
0.4741757 |
| mod6 |
992.1514 |
0.4522078 |
| mod7 |
971.9230 |
0.4755770 |
# prediction data
pred_dat <- data.frame(
dec_time = seq(min(sf_mod$dec_time), max(sf_mod$dec_time), length = 1000)
) %>%
mutate(
date = date_decimal(dec_time),
date = as.Date(date),
mo = month(date, label = TRUE),
doy = yday(date),
yr = year(date)
) %>%
left_join(., fl_dat[, c('date', 'qsm')]) %>%
mutate(flo = log(qsm)) %>%
select(-qsm)
# predictions
sf_res <- map(mods, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sf_res, aes(x = date)) +
geom_point(data = sf_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
# plot
p <- ggplot(sf_res, aes(x = doy, group = factor(yr), colour = yr)) +
geom_line(aes(y = pred)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
) +
facet_wrap(~ mods, ncol = 2)
ggplotly(p)
GAMs with annual, seasonal, and flow trends
Adding flow data to the model:
# smooths to evaluate
smths <- c(
"s(dec_time, bs = 'tp')",
"s(doy, bs = 'cc')",
"s(flo, bs = 'tp')",
"te(flo, doy, bs = c('tp', 'cc'))",
"te(flo, dec_time, bs = c('tp', 'tp'))",
"te(dec_time, doy, bs = c('tp', 'cc'))",
"te(dec_time, doy, flo, bs = c('tp', 'cc', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frms <- list()
for(i in seq_along(smths)){
frm <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ') %>%
paste('chl ~ ', .) %>%
formula
})
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods2 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sf_mod,
na.action = na.exclude
)
})
names(mods2) <- paste0('mod', seq_along(mods2))
save(mods2, file = 'data/sf_mods2.RData', compress = 'xz')
Summary stats of best year/season model, year/season/flow model
data(sf_mods2)
# best model with only season, year
best1 <- map(mods, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods[[.]]
# best model with season, year, flow
best2 <- map(mods2, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods2[[.]]
best <- list(best1 = best1, best2 = best2)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| best1 |
s(dec_time) |
7.332864 |
8.308760 |
9.278011 |
0.0000000 |
| best1 |
s(doy) |
3.385611 |
8.000000 |
5.284500 |
0.0000000 |
| best1 |
te(dec_time,doy) |
6.637691 |
15.000000 |
1.417708 |
0.0003557 |
| best2 |
s(dec_time) |
6.447441 |
7.549442 |
4.651559 |
0.0000330 |
| best2 |
s(flo) |
3.199187 |
4.047691 |
1.931167 |
0.0935574 |
| best2 |
te(flo,dec_time) |
11.199615 |
16.000000 |
1.846377 |
0.0000008 |
| best2 |
te(dec_time,doy,flo) |
37.046429 |
75.000000 |
3.690058 |
0.0000000 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| best1 |
971.9230 |
0.4755770 |
| best2 |
880.8418 |
0.5852104 |
# predictions
sf_res2 <- map(best, function(x){
pred_dat %>%
mutate(
pred = predict(x, newdata = pred_dat)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sf_res2, aes(x = date)) +
geom_point(data = sf_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)
ptheme <- theme(
axis.title.x = element_blank(),
axis.title.y = element_blank()
)
cols <- 'Spectral'
pb1 <- dynagam(best1, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
theme(legend.position = 'none') +
ggtitle('Best 1')
pb2 <- dynagam(best2, pred_dat, ncol = 1, col_vec = cols) +
ptheme +
ggtitle('Best2')
pleg <- g_legend(pb2)
pb2 <- pb2 +
theme(legend.position = 'none')
grid.arrange(
pleg,
arrangeGrob(pb1, pb2, ncol = 2, bottom = 'lnQ', left = 'lnchl'),
ncol = 1,
heights = c(0.1, 1)
)

Adding nitrogen and turbidity covariates
# formula for best annual, seasonal, flow model
strt <- best2$formula %>%
as.character
smths <- c(
"s(nh, bs = 'tp')",
"s(tss, bs = 'tp')",
"te(nh, tss, bs = c('tp', 'tp'))"
)
# get all combinations of smoothers to model, one to many
frmstab <- list()
frms <- list()
for(i in seq_along(smths)){
# for the summary table
frmtab <- combn(smths, i) %>%
apply(2, function(x){
paste(x, collapse = ' + ')
})
# for the model
frm <- sapply(frmtab, function(x){
paste('chl ~', strt[3], '+', x) %>%
formula
})
frmstab <- c(frmstab, frmtab)
frms <- c(frms, frm)
}
# create models from smooth formula combinations
mods3 <- map(frms, function(frm){
gam(frm,
knots = list(doy = c(1, 366)),
data = sf_mod,
na.action = na.exclude
)
})
names(mods3) <- paste0('mod', seq_along(mods3))
Summary of all nutrient, tss models:
# smoother stats of GAMs
map(mods3, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| mod1 |
s(dec_time) |
6.7326765 |
7.787103 |
7.4425304 |
0.0000000 |
| mod1 |
s(flo) |
3.1028428 |
3.960042 |
3.0934249 |
0.0128012 |
| mod1 |
te(flo,dec_time) |
11.1034028 |
16.000000 |
1.7440653 |
0.0000018 |
| mod1 |
te(dec_time,doy,flo) |
36.5082544 |
75.000000 |
3.7440972 |
0.0000000 |
| mod1 |
s(nh) |
1.0000000 |
1.000000 |
10.7589384 |
0.0011106 |
| mod2 |
s(dec_time) |
6.7631403 |
7.808429 |
3.5705014 |
0.0006671 |
| mod2 |
s(flo) |
4.5555525 |
5.606869 |
0.5160503 |
0.8094488 |
| mod2 |
te(flo,dec_time) |
6.3101387 |
16.000000 |
0.8707123 |
0.0001731 |
| mod2 |
te(dec_time,doy,flo) |
29.4900693 |
75.000000 |
3.2696992 |
0.0000000 |
| mod2 |
s(tss) |
4.4643812 |
5.534840 |
6.3307603 |
0.0000057 |
| mod3 |
s(dec_time) |
7.1607042 |
8.159000 |
3.0195017 |
0.0021427 |
| mod3 |
s(flo) |
1.0000000 |
1.000000 |
1.0117026 |
0.3149976 |
| mod3 |
te(flo,dec_time) |
6.7124424 |
16.000000 |
0.9168085 |
0.0003409 |
| mod3 |
te(dec_time,doy,flo) |
40.9268267 |
75.000000 |
3.6832771 |
0.0000000 |
| mod3 |
te(nh,tss) |
2.9999967 |
3.000000 |
10.2798312 |
0.0000014 |
| mod4 |
s(dec_time) |
7.0804950 |
8.099234 |
3.0429996 |
0.0021289 |
| mod4 |
s(flo) |
1.0000000 |
1.000000 |
1.0629383 |
0.3030597 |
| mod4 |
te(flo,dec_time) |
6.8780322 |
16.000000 |
0.9204817 |
0.0003869 |
| mod4 |
te(dec_time,doy,flo) |
41.2381557 |
75.000000 |
3.8211715 |
0.0000000 |
| mod4 |
s(nh) |
1.0000000 |
1.000000 |
1.6753508 |
0.1961569 |
| mod4 |
s(tss) |
1.0000000 |
1.000000 |
20.7089185 |
0.0000067 |
| mod5 |
s(dec_time) |
7.1604176 |
8.153588 |
2.8342646 |
0.0038231 |
| mod5 |
s(flo) |
1.0000000 |
1.000000 |
0.8126575 |
0.3677865 |
| mod5 |
te(flo,dec_time) |
6.3716705 |
16.000000 |
0.8176419 |
0.0006966 |
| mod5 |
te(dec_time,doy,flo) |
40.9862091 |
75.000000 |
3.6871420 |
0.0000000 |
| mod5 |
s(nh) |
2.9416732 |
3.677488 |
0.6073391 |
0.6762830 |
| mod5 |
te(nh,tss) |
2.2871431 |
20.000000 |
0.8529033 |
0.0000035 |
| mod6 |
s(dec_time) |
6.8672780 |
7.929956 |
2.9102143 |
0.0048115 |
| mod6 |
s(flo) |
1.0000001 |
1.000000 |
0.5453671 |
0.4605768 |
| mod6 |
te(flo,dec_time) |
6.8057389 |
16.000000 |
0.9049012 |
0.0004014 |
| mod6 |
te(dec_time,doy,flo) |
40.6963145 |
75.000000 |
3.6503971 |
0.0000000 |
| mod6 |
s(tss) |
3.7996742 |
4.785158 |
6.0630463 |
0.0000382 |
| mod6 |
te(nh,tss) |
0.1672564 |
20.000000 |
0.0093602 |
0.2632117 |
| mod7 |
s(dec_time) |
7.0804950 |
8.099234 |
3.0429996 |
0.0021289 |
| mod7 |
s(flo) |
1.0000000 |
1.000000 |
1.0629383 |
0.3030597 |
| mod7 |
te(flo,dec_time) |
6.8780322 |
16.000000 |
0.9204817 |
0.0003869 |
| mod7 |
te(dec_time,doy,flo) |
41.2381557 |
75.000000 |
3.8211715 |
0.0000000 |
| mod7 |
s(nh) |
1.0000000 |
1.000000 |
1.6753508 |
0.1961569 |
| mod7 |
s(tss) |
1.0000000 |
1.000000 |
20.7089185 |
0.0000067 |
| mod7 |
te(nh,tss) |
0.0000000 |
16.000000 |
0.0000000 |
1.0000000 |
# summary stats of GAMs
map(mods3, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
mutate(smth_added = frmstab) %>%
select(name, smth_added, everything()) %>%
kable
| mod1 |
s(nh, bs = ‘tp’) |
868.6768 |
0.5897820 |
| mod2 |
s(tss, bs = ‘tp’) |
854.2826 |
0.5884729 |
| mod3 |
te(nh, tss, bs = c(‘tp’, ‘tp’)) |
843.6342 |
0.5967456 |
| mod4 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) |
842.2550 |
0.5973803 |
| mod5 |
s(nh, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
845.4859 |
0.5966324 |
| mod6 |
s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
843.7245 |
0.5970284 |
| mod7 |
s(nh, bs = ‘tp’) + s(tss, bs = ‘tp’) + te(nh, tss, bs = c(‘tp’, ‘tp’)) |
842.2550 |
0.5973803 |
Summary stats of best three three models:
# best model with season, year, flow
best3 <- map(mods3, ~ summary(.x)$r.sq) %>%
unlist %>%
which.max %>%
mods3[[.]]
best <- list(best1 = best1, best2 = best2, best3 = best3)
# smoother stats of GAMs
map(best, ~ summary(.x)$s.table %>% data.frame %>% rownames_to_column('smoother')) %>%
enframe %>%
unnest %>%
kable
| best1 |
s(dec_time) |
7.332864 |
8.308760 |
9.2780106 |
0.0000000 |
| best1 |
s(doy) |
3.385611 |
8.000000 |
5.2844996 |
0.0000000 |
| best1 |
te(dec_time,doy) |
6.637691 |
15.000000 |
1.4177080 |
0.0003557 |
| best2 |
s(dec_time) |
6.447441 |
7.549442 |
4.6515592 |
0.0000330 |
| best2 |
s(flo) |
3.199187 |
4.047691 |
1.9311669 |
0.0935574 |
| best2 |
te(flo,dec_time) |
11.199615 |
16.000000 |
1.8463772 |
0.0000008 |
| best2 |
te(dec_time,doy,flo) |
37.046429 |
75.000000 |
3.6900576 |
0.0000000 |
| best3 |
s(dec_time) |
7.080495 |
8.099234 |
3.0429996 |
0.0021289 |
| best3 |
s(flo) |
1.000000 |
1.000000 |
1.0629383 |
0.3030597 |
| best3 |
te(flo,dec_time) |
6.878032 |
16.000000 |
0.9204817 |
0.0003869 |
| best3 |
te(dec_time,doy,flo) |
41.238156 |
75.000000 |
3.8211715 |
0.0000000 |
| best3 |
s(nh) |
1.000000 |
1.000000 |
1.6753508 |
0.1961569 |
| best3 |
s(tss) |
1.000000 |
1.000000 |
20.7089185 |
0.0000067 |
| best3 |
te(nh,tss) |
0.000000 |
16.000000 |
0.0000000 |
1.0000000 |
# summary stats of GAMs
map(best, ~ data.frame(
AIC = AIC(.x),
R2 = summary(.x)$r.sq)) %>%
enframe %>%
unnest %>%
kable
| best1 |
971.9230 |
0.4755770 |
| best2 |
880.8418 |
0.5852104 |
| best3 |
842.2550 |
0.5973803 |
# predictions
sf_res3 <- map(best, function(x){
sf_mod %>%
mutate(
pred = predict(x)
)
}) %>%
enframe('mods') %>%
unnest
# plot
p <- ggplot(sf_res3, aes(x = date)) +
geom_point(data = sf_mod, aes(y = chl), size = 0.5) +
geom_line(aes(y = pred, colour = mods)) +
theme_bw() +
theme(
legend.position = 'top',
legend.title = element_blank()
)
ggplotly(p)